# load packages
library(tidyverse)
library(gapminder)
library(plotly)
library(corrplot)
library(GGally)
library(crosstalk)
library(DT)
library(ggforce)Intereactive Plots - STARTER
ggplotly()
Building simple interactive plots
ggplot(data = diamonds,
aes(x = cut)) +
geom_bar()Interactivity for other plots
p <- ggplot(data = diamonds,
aes(x = cut,
fill = clarity)) +
geom_bar(position = "dodge")
ggplotly(p)p <- ggplot(data = diamonds,
aes(x = price)) +
geom_histogram() +
facet_grid(cut ~ .,
scales = "free_y")
ggplotly(p)`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
p <- ggplot(data = diamonds,
aes(x = cut,
y = price)) +
geom_boxplot()
ggplotly(p)Exercise
- Using the code chunk below, create a proportionally stacked bar chart of
pricebyclarityusing thediamondsdataset, then add interactivity. Does all of the interactivity features work well with this plot type?
New plots with interactivity
p <- ggplot(data = diamonds,
aes(x = log(carat),
y = log(price))) +
geom_point()
ggplotly(p)p <- ggplot(data = diamonds,
aes(x = log(carat),
y = log(price))) +
geom_point()
ggplotly(p)Application
?gapminder
head(gapminder)# A tibble: 6 × 6
country continent year lifeExp pop gdpPercap
<fct> <fct> <int> <dbl> <int> <dbl>
1 Afghanistan Asia 1952 28.8 8425333 779.
2 Afghanistan Asia 1957 30.3 9240934 821.
3 Afghanistan Asia 1962 32.0 10267083 853.
4 Afghanistan Asia 1967 34.0 11537966 836.
5 Afghanistan Asia 1972 36.1 13079460 740.
6 Afghanistan Asia 1977 38.4 14880372 786.
Rebuilding plots with plot_ly()
plot_ly() basics
Building plotly objects
layout(
plot_ly(diamonds, x = ~cut),
title = "My beatiful histogram"
)No trace type specified:
Based on info supplied, a 'histogram' trace seems appropriate.
Read more about this trace type -> https://plotly.com/r/reference/#histogram
plot_ly(diamonds, x = ~cut) %>% layout(title = "My beatiful histogram")No trace type specified:
Based on info supplied, a 'histogram' trace seems appropriate.
Read more about this trace type -> https://plotly.com/r/reference/#histogram
diamonds %>%
plot_ly() %>%
add_histogram(x = ~cut)Common plotly plots
Bars and histograms
# preview data
mtcars %>% tibble::rownames_to_column(var = "model") model mpg cyl disp hp drat wt qsec vs am gear carb
1 Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
2 Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
3 Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
4 Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
5 Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
6 Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
7 Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
8 Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
9 Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
10 Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
11 Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
12 Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
13 Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
14 Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
15 Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
16 Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
17 Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
18 Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
19 Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
20 Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
21 Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
22 Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
23 AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
24 Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
25 Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
26 Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
27 Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
28 Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
29 Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
30 Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
31 Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
32 Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
Exercise
- Using the code chunk below and the
gapminderdataset, create a bar graph of the number of countries per continent in only the first year of data collection. Can you create this bar graph two different ways?
CHALLENGE: Polish this plot by sorting by descending frequency, adding data labels on top of the bars, adding an informative title and hiding the legend.
Boxplots and schema()
diamonds %>%
plot_ly(x = ~price,
y = ~cut) %>%
add_boxplot(boxpoints = FALSE)Exercise
- Using the code chunk below and the
irisdataset, create comparative boxplots ofSepal.Widthfor eachSpecies, sorted by descending mean.
Scatterplots
Application
Exercise
Using the code chunk below and the
irisdataset, create two scatterplots ofSepal.WidthbySepal.Length:Scatterplot 1: The color of every point is green, and the mouse over info also displays the
Species.Scatterplot 2: Color each point by
Species, except we want to the colors to be as follows: setosa = darkgreen, versicolor = green, virginica = grey.
Line plots
data_sun <- data.frame(year = c(1700:1988),
sunspots = as.vector(sunspot.year))head(economics)# A tibble: 6 × 6
date pce pop psavert uempmed unemploy
<date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1967-07-01 507. 198712 12.6 4.5 2944
2 1967-08-01 510. 198911 12.6 4.7 2945
3 1967-09-01 516. 199113 11.9 4.6 2958
4 1967-10-01 512. 199311 12.9 4.9 3143
5 1967-11-01 517. 199498 12.8 4.7 3066
6 1967-12-01 525. 199657 11.8 4.8 3018
econ <- economics %>%
mutate(year = year(date),
month = month(date))econ %>%
group_by(year) %>%
plot_ly(x = ~month,
y = ~unemploy) %>%
add_lines(color = ~ordered(year))Application
gapminder %>%
group_by(country) %>%
plot_ly(x = ~year, y = ~lifeExp) %>%
add_lines(color = ~continent)gapminder %>%
group_by(country) %>%
plot_ly(x = ~year, y = ~lifeExp) %>%
add_lines(color = ~continent)Other types of plotly plots
2D histogram and heatmap
corr <- diamonds %>%
select(where(is.numeric)) %>%
cor
corrplot::corrplot(corr)corr %>%
data.frame %>%
plot_ly(x = rownames(corr), y = colnames(corr), z = corr) %>%
add_heatmap(colors = "RdBu") %>%
colorbar(limits = c(-1, 1))Exercise
Using the code chunk below, create the following graphs:
An interactive 2D histogram for
Petal.WidthandPetal.Lengthfrom theirisdataset. What other type of plot can we make to display two quantitative variables that may be a better choice for this data?An interactive heatmap for
colorbyclarityfrom thediamondsdataset. Note that the best way to do this is to let plotly guess the plot type when supplying two categorical variables toxandy.
Slope graphs and dumbell charts
gapminder %>%
filter(year %in% c(min(year), max(year))) %>%
summarize(.by = c(continent, year),
avg_lifeExp = round(mean(lifeExp), 1)) %>%
pivot_wider(names_from = year,
values_from = avg_lifeExp,
names_prefix = "year_")
# add plotly code
layout(title = "Gapminder average life expectancy",
xaxis = list(ticktext = c("1952", "2007"),
tickvals = c(1, 2),
zeroline = FALSE),
yaxis = list(title = "",
showgrid = FALSE,
showticks = FALSE,
showticklabels = FALSE))head(mpg)# A tibble: 6 × 11
manufacturer model displ year cyl trans drv cty hwy fl class
<chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
1 audi a4 1.8 1999 4 auto(l5) f 18 29 p compa…
2 audi a4 1.8 1999 4 manual(m5) f 21 29 p compa…
3 audi a4 2 2008 4 manual(m6) f 20 31 p compa…
4 audi a4 2 2008 4 auto(av) f 21 30 p compa…
5 audi a4 2.8 1999 6 auto(l5) f 16 26 p compa…
6 audi a4 2.8 1999 6 manual(m5) f 18 26 p compa…
# create summary data of mean mpg by model
# then create dumbell chart with segments and points
# -> manually specify color legend
mpg %>%
summarize(.by = model,
across(c(cty, hwy), mean)) %>%
mutate(model = fct_reorder(model, cty)) %>%
ggplot() +
geom_segment(aes(x = cty,
xend = hwy,
y = model,
yend = model),
color = "grey") +
geom_point(aes(x = cty,
y = model,
color = "blue")) +
geom_point(aes(x = hwy,
y = model,
color = "orange")) +
scale_color_manual(name = "MPG",
values = c("blue", "orange"),
labels = c("city", "hwy")) +
theme_bw()mpg %>%
summarize(.by = model,
across(c(cty, hwy), mean)) %>%
mutate(model = fct_reorder(model, cty)) # A tibble: 38 × 3
model cty hwy
<fct> <dbl> <dbl>
1 a4 18.9 28.3
2 a4 quattro 17.1 25.8
3 a6 quattro 16 24
4 c1500 suburban 2wd 12.8 17.8
5 corvette 15.4 24.8
6 k1500 tahoe 4wd 12.5 16.2
7 malibu 18.8 27.6
8 caravan 2wd 15.8 22.4
9 dakota pickup 4wd 12.8 17
10 durango 4wd 11.9 16
# ℹ 28 more rows
Exercise
Using the code chunk below, create the following graphs:
An interactive slopegraph using the
mpgdataset forctyvshwygas mileage bymodel. Use the same summarizing code as for the dumbell chart (we need wide summary data). What is a problem we have to consider with this type of plot?An interactive dumbell plot using the mean
lifeExpbycontinentfrom thegapminderdataset. Start with the same summarizing code as for the slopegraph (we need wide summary data again). Be sure to order the levels ofcontinentby increasing mean for the minimum year.
Parallel coordinates plot
# create parallel coordinate plot using default options
iris %>%
ggparcoord(columns = 1:4,
groupColumn = 5,
scale = "uniminmax",
order = "anyClass",
alphaLines = 0.5) +
theme_bw()# confirm trends with correlation matrix
cor(select(iris, where(is.numeric))) %>% round(3) Sepal.Length Sepal.Width Petal.Length Petal.Width
Sepal.Length 1.000 -0.118 0.872 0.818
Sepal.Width -0.118 1.000 -0.428 -0.366
Petal.Length 0.872 -0.428 1.000 0.963
Petal.Width 0.818 -0.366 0.963 1.000
Graphical queries
Basic graphical queries
mtcars %>%
plot_ly(x = ~wt,
y = ~mpg) %>%
add_markers() %>%
add_text(text = ~cyl,
textposition = "top")Linked brushing
mtcars %>%
highlight_key(~cyl) %>%
plot_ly(x = ~wt,
y = ~mpg) %>%
add_markers() %>%
add_text(text = ~cyl,
textposition = "top") %>%
highlight(on = "plotly_hover")Setting the `off` event (i.e., 'plotly_doubleclick') to match the `on` event (i.e., 'plotly_hover'). You can change this default via the `highlight()` function.
Application
shared_data <- highlight_key(mpg)
p <- shared_data %>%
plot_ly(x = ~displ,
y = ~hwy) %>%
add_markers() %>%
highlight(on = "plotly_selected")
bscols(p, datatable(shared_data, height = 500))Setting the `off` event (i.e., 'plotly_deselect') to match the `on` event (i.e., 'plotly_selected'). You can change this default via the `highlight()` function.
# plot two layers
# -> one of all points with grey color
# -> another with just points of interest in a different color
# -> add legend with informative values
ggplot() +
geom_point(aes(x = displ,
y = hwy,
color = "Other"),
data = mpg) +
geom_point(aes(x = displ,
y = hwy,
color = "Corvette"),
data = filter(mpg, model == "corvette")) +
scale_color_manual(values = c("Other" = "grey", "Corvette" = "red"),
name = "Model") +
labs(title = "Fuel economy from 1999 to 2008 for 38 car models",
caption = "Source: https://fueleconomy.gov/",
x = "Engine Displacement",
y = "Miles Per Gallon") +
theme_bw() ggplot(data = mpg) +
geom_point(aes(x = displ,
y = hwy)) +
labs(title = "Fuel economy from 1999 to 2008 for 38 car models",
caption = "Source: https://fueleconomy.gov/",
x = "Engine Displacement",
y = "Miles Per Gallon") +
theme_bw()# show hull with colored points to point out caution when using this technique
ggplot() +
geom_point(aes(x = displ,
y = hwy),
data = mpg) +
geom_point(aes(x = displ,
y = hwy,
color = "a4"),
data = filter(mpg, model == "a4")) +
geom_mark_hull(aes(x = displ,
y = hwy,
filter = model == "a4",
label = model),
data = mpg) +
scale_color_manual(values = c("a4" = "red"),
name = "Model") +
theme_bw()More graphical queries
gapminder %>%
group_by(country) %>%
plot_ly(x = ~year,
y = ~lifeExp,
text = ~country) %>%
add_lines(color = ~continent)Exercise
Using the code chuck below, explore the
ggplot2::msleepdata.Create a linked brushing setup for a scatterplot of
brainwtbysleep_totaland the corresponding data table. Which points stand out? Which species are they?CHALLENGE: Recreate the scatterplot as a static image using ggplot2 and add annotations to the interesting species via
geom_mark_*()as if it were to be in the final published work. Add nicely formatted, informative labels and titles as well.
Linking multiple plots and subplot()
mtcars %>%
plot_ly(x = ~ordered(cyl)) %>%
add_histogram()mtcars %>%
plot_ly(x = ~wt,
y = ~mpg) %>%
add_markers() p <- plot_ly(diamonds,
y = ~price,
color = I("black"),
alpha = 0.1 )
p1 <- p %>% add_boxplot(x = "Overall")
p2 <- p %>% add_boxplot(x = ~cut)
subplot(p1, p2)p1 <- ggplot(data = diamonds,
aes(x = price,
color = cut)) +
geom_density() +
theme_bw()
p2 <- diamonds %>%
plot_ly() %>%
add_boxplot(x = ~price,
y = ~cut,
color = ~cut)shared_data <- highlight_key(diamonds)
p1 <- ggplot(data = shared_data,
aes(x = price,
color = cut)) +
geom_density() +
theme_bw()
p2 <- shared_data %>%
plot_ly() %>%
add_boxplot(x = ~price,
y = ~cut,
color = ~cut)
subplot(ggplotly(p1), p2,
nrows = 2,
shareX = TRUE) %>%
highlight(on = "plotly_click")Warning: Can only have one: config
Setting the `off` event (i.e., 'plotly_doubleclick') to match the `on` event (i.e., 'plotly_click'). You can change this default via the `highlight()` function.
Exercise
Using code chuck below and the starter code that filters and summarizes the
Lahman::Battingdata to team totals for the most current year then creates three density plots, do the following:CHALLENGE: Create an interactive parallel coordinates. Remember that we need long data for all of the numeric variables and we can group by
teamIDbecause that acts as the observation ID. What can we conclude from this plot, if anything?Combine these plots into a single view with
subplot(); however have the three density plots in the first row and the parallel coordinates plot in the second row.
# create team summarized batting data for the most recent year
batting <- Lahman::Batting %>%
filter(yearID == max(yearID)) %>%
select(-c(stint,G)) %>%
summarize(.by = c(teamID, yearID, lgID), across(c(where(is.numeric)), sum)) %>%
mutate(yearID = as.factor(yearID)) %>% # so year doesn't get rescaled in the parallel coordinates plot
select(where(is.factor), HR, RBI, SB) # just look at three important batting stats
# create three different density plots
p1 <- batting %>%
ggplot() +
geom_density(aes(x = HR,
color = lgID)) +
theme_bw()
p2 <- batting %>%
ggplot() +
geom_density(aes(x = RBI,
color = lgID)) +
theme_bw()
p3 <- batting %>%
ggplot() +
geom_density(aes(x = SB,
color = lgID)) +
theme_bw()
# create parallel coordinates plot
# organize plotsFilter events
Highlight vs filter
Creating a filtered event plot
# create shared data object
shared_data <- highlight_key(txhousing)
# create highlight plot from shared data object
p <- ggplot(data = shared_data) +
geom_line(aes(x = date,
y = median,
group = city))
# arrange select box for filtering shared data object and plot from same shared data objectExercise
- Using the code chunk below, modify / add to the code below to transform the static timeseries plot of the
gapminderdataset into an interactive filtered event plot.
ggplot(data = gapminder) +
geom_line(aes(x = year,
y = lifeExp,
group = country,
color = continent)) +
theme_bw()